YouTube Analytics¶
Now We are going to Explore the Top 3 Telugu Youtubers Video Contents Here
YouTube Overview¶
In [ ]:
import pandas as pd
import plotly
import plotly.offline as pyo
import plotly.graph_objs as go
import plotly.express as px
# Set notebook mode to work in offline
pyo.init_notebook_mode()
Ov = pd.read_excel(r"C:\Users\50510\Desktop\YouTube_Projects\YouTube_Overview.xlsx")
Ov.head()
Out[ ]:
| Channel_name | Subscribers | Views | Total_Videos | |
|---|---|---|---|---|
| 0 | Uma Telugu Traveller | 816000 | 168577301 | 510 |
| 1 | Ravi Telugu Traveller | 700000 | 215070028 | 719 |
| 2 | Naa Anveshana | 1440000 | 369601360 | 1021 |
In [ ]:
Ov.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 3 entries, 0 to 2 Data columns (total 4 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Channel_name 3 non-null object 1 Subscribers 3 non-null int64 2 Views 3 non-null int64 3 Total_Videos 3 non-null int64 dtypes: int64(3), object(1) memory usage: 224.0+ bytes
In [ ]:
import plotly.express as px
# Create a bar plot with different colors for each channel
fig = px.bar(Ov, x='Channel_name', y='Subscribers', title='Subscribers by Channel',
color='Channel_name') # Specify 'color' parameter
fig.show()
In [ ]:
import plotly.express as px
# Create a bar plot with different colors for each channel
fig = px.bar(Ov, x='Channel_name', y='Total_Videos', title='Posted Videos by Channel',
color='Channel_name') # Specify 'color' parameter
fig.show()
In [ ]:
import plotly.express as px
# Create a bar plot with different colors for each channel
fig = px.bar(Ov, x='Channel_name', y='Views', title='Views by Channel',
color='Channel_name') # Specify 'color' parameter
fig.show()
In [ ]:
import plotly.express as px
# Create a bar plot with different colors for each channel
fig = px.bar(Ov, x='Subscribers', y='Views', title='Views by Channel',
color='Channel_name') # Specify 'color' parameter
fig.show()
Content Insights¶
In [ ]:
df = pd.read_excel(r"C:\Users\50510\Desktop\YouTube_Projects\Cleaned_YouTube_Data.xlsx")
df.drop(columns=['Unnamed: 0'], inplace=True)
df.head(1)
Out[ ]:
| Date_Published | Title | Tags | channel | Comments | viewCount | likeCount | favoriteCount | Cl_Text | English_Text | Text | Genre | personalities_ents | Geographical_locations | Nationalities | Locations | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-08-22 02:30:05+00:00 | Naa Anveshana meet up in Montreal and Vancouv... | ['Naa Anveshana meet up in Montreal and Vancou... | Naa Anveshana | 1169 | 333229 | 19698 | 0 | naa anveshana meet up in montreal and vancouver | Naa Anveshana meet up in Montreal and Vancouv... | naa anveshana meet up in  montreal and vancou... | National | naa anveshana | montreal , vancouver , canada | NaN | NaN |
In [ ]:
df['Engagement'] = df['Comments'] + df['viewCount'] + df['likeCount'] + df['favoriteCount']
df.head()
Out[ ]:
| Date_Published | Title | Tags | channel | Comments | viewCount | likeCount | favoriteCount | Cl_Text | English_Text | Text | Genre | personalities_ents | Geographical_locations | Nationalities | Locations | Engagement | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-08-22 02:30:05+00:00 | Naa Anveshana meet up in Montreal and Vancouv... | ['Naa Anveshana meet up in Montreal and Vancou... | Naa Anveshana | 1169 | 333229 | 19698 | 0 | naa anveshana meet up in montreal and vancouver | Naa Anveshana meet up in Montreal and Vancouv... | naa anveshana meet up in  montreal and vancou... | National | naa anveshana | montreal , vancouver , canada | NaN | NaN | 354096 |
| 1 | 2023-08-19 04:52:05+00:00 | English Bay Beach Vancouver | Suspension bridg... | ['Canada Place', 'English Bay Beach Vancouver'... | Naa Anveshana | 2006 | 980291 | 51011 | 0 | english bay beach vancouver | suspension bridg... | English Bay Beach Vancouver | Suspension bridg... | english bay beach vancouver | suspension bridg... | World | NaN | vancouver , canada | NaN | english bay | 1033308 |
| 2 | 2023-08-17 03:47:03+00:00 | 48 hrs On Canada's Most Luxury train journey B... | ['Naaanveshana', 'banff', 'british columbia', ... | Naa Anveshana | 3774 | 1686573 | 74544 | 0 | 48 hrs on canada's most luxury train journey b... | 48 hrs On Canada's Most Luxury train journey B... | 48 hrs on canada 's most luxury train journey ... | World | NaN | canada , banff , vancouver | NaN | NaN | 1764891 |
| 3 | 2023-08-15 02:30:31+00:00 | Sulphur banff gondola | things to do in banff ... | ['Naaanveshana', 'Sulphur banff gondola', 'ban... | Naa Anveshana | 2462 | 2189913 | 63843 | 0 | sulphur banff gondola | things to do in banff ... | Sulphur banff gondola | things to do in banff ... | sulphur banff gondola | things to do in banff ... | Health&Nature | NaN | NaN | NaN | NaN | 2256218 |
| 4 | 2023-08-12 12:49:37+00:00 | Columbia ice field Glacier Adventure Jasper Na... | ['Naaanveshana', 'athabasca falls', 'athabasca... | Naa Anveshana | 3430 | 1481449 | 64606 | 0 | columbia ice field glacier adventure jasper na... | Columbia ice field Glacier Adventure Jasper Na... | columbia ice field glacier adventure jasper na... | Health&Nature | NaN | NaN | NaN | NaN | 1549485 |
In [ ]:
df['Date_Published'] = pd.to_datetime(df['Date_Published'])
In [ ]:
df.columns
Out[ ]:
Index(['Date_Published', 'Title', 'Tags', 'channel', 'Comments', 'viewCount',
'likeCount', 'favoriteCount', 'Cl_Text', 'English_Text', 'Text',
'Genre', 'personalities_ents', 'Geographical_locations',
'Nationalities', 'Locations', 'Engagement'],
dtype='object')
In [ ]:
### Engagement Trend By Date Published
# Create a line plot with different lines for each channel
fig = px.line(df, x='Date_Published', y='Engagement', color='channel', title='Engagement Trend by Channel')
# Show the plot
fig.show()
In [ ]:
### Comments Trend By Time
# Create a line plot with different lines for each channel
fig = px.line(df, x='Date_Published', y='Comments', color='channel', title='Comments Trend by Channel')
# Show the plot
fig.show()
In [ ]:
### Likes Trend By Time
# Create a line plot with different lines for each channel
fig = px.line(df, x='Date_Published', y='likeCount', color='channel', title='Likes Trend by Channel')
# Show the plot
fig.show()
In [ ]:
### viewCount Trend By Time
# Create a line plot with different lines for each channel
fig = px.line(df, x='Date_Published', y='viewCount', color='channel', title='viewCount Trend by Channel')
# Show the plot
fig.show()
In [ ]:
df['channel'].value_counts()
Out[ ]:
channel Naa Anveshana 1022 Ravi Telugu Traveller 719 Uma Telugu Traveller 510 Name: count, dtype: int64
Top Videos¶
In [ ]:
anv = df.loc[df['channel']=='Naa Anveshana']
# Sort the DataFrame by Engagement and select top 10 videos
Anvesh_Top10_videos = anv.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 10 videos by engagement
fig = px.bar(Anvesh_Top10_videos, x='Engagement', y='Title', color='channel',
title='Top 20 Videos by Na Anveshana')
# Customize the layout if needed
fig.update_layout(xaxis_title='Engagement', yaxis_title='Video Title')
# Show the plot
fig.show()
In [ ]:
ravi = df.loc[df['channel']=='Ravi Telugu Traveller']
# Sort the DataFrame by Engagement and select top 10 videos
ravi_Top20_videos = ravi.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 10 videos by engagement
fig = px.bar(ravi_Top20_videos, x='Engagement', y='Title', color='channel',
title='Top 20 Videos by Ravi Telugu Traveller')
# Customize the layout if needed
fig.update_layout(xaxis_title='Engagement', yaxis_title='Video Title')
# Show the plot
fig.show()
In [ ]:
uma = df.loc[df['channel']=='Uma Telugu Traveller']
# Sort the DataFrame by Engagement and select top 10 videos
uma_Top20_videos = uma.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 10 videos by engagement
fig = px.bar(uma_Top20_videos, x='Engagement', y='Title', color='channel',
title='Top 20 Videos by Uma Telugu Traveller')
# Customize the layout if needed
fig.update_layout(xaxis_title='Engagement', yaxis_title='Video Title')
# Show the plot
fig.show()
In [ ]:
df.head()
Out[ ]:
| Date_Published | Title | Tags | channel | Comments | viewCount | likeCount | favoriteCount | Cl_Text | English_Text | Text | Genre | personalities_ents | Geographical_locations | Nationalities | Locations | Engagement | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2023-08-22 02:30:05+00:00 | Naa Anveshana meet up in Montreal and Vancouv... | ['Naa Anveshana meet up in Montreal and Vancou... | Naa Anveshana | 1169 | 333229 | 19698 | 0 | naa anveshana meet up in montreal and vancouver | Naa Anveshana meet up in Montreal and Vancouv... | naa anveshana meet up in  montreal and vancou... | National | naa anveshana | montreal , vancouver , canada | NaN | NaN | 354096 |
| 1 | 2023-08-19 04:52:05+00:00 | English Bay Beach Vancouver | Suspension bridg... | ['Canada Place', 'English Bay Beach Vancouver'... | Naa Anveshana | 2006 | 980291 | 51011 | 0 | english bay beach vancouver | suspension bridg... | English Bay Beach Vancouver | Suspension bridg... | english bay beach vancouver | suspension bridg... | World | NaN | vancouver , canada | NaN | english bay | 1033308 |
| 2 | 2023-08-17 03:47:03+00:00 | 48 hrs On Canada's Most Luxury train journey B... | ['Naaanveshana', 'banff', 'british columbia', ... | Naa Anveshana | 3774 | 1686573 | 74544 | 0 | 48 hrs on canada's most luxury train journey b... | 48 hrs On Canada's Most Luxury train journey B... | 48 hrs on canada 's most luxury train journey ... | World | NaN | canada , banff , vancouver | NaN | NaN | 1764891 |
| 3 | 2023-08-15 02:30:31+00:00 | Sulphur banff gondola | things to do in banff ... | ['Naaanveshana', 'Sulphur banff gondola', 'ban... | Naa Anveshana | 2462 | 2189913 | 63843 | 0 | sulphur banff gondola | things to do in banff ... | Sulphur banff gondola | things to do in banff ... | sulphur banff gondola | things to do in banff ... | Health&Nature | NaN | NaN | NaN | NaN | 2256218 |
| 4 | 2023-08-12 12:49:37+00:00 | Columbia ice field Glacier Adventure Jasper Na... | ['Naaanveshana', 'athabasca falls', 'athabasca... | Naa Anveshana | 3430 | 1481449 | 64606 | 0 | columbia ice field glacier adventure jasper na... | Columbia ice field Glacier Adventure Jasper Na... | columbia ice field glacier adventure jasper na... | Health&Nature | NaN | NaN | NaN | NaN | 1549485 |
In [ ]:
df.columns
Out[ ]:
Index(['Date_Published', 'Title', 'Tags', 'channel', 'Comments', 'viewCount',
'likeCount', 'favoriteCount', 'Cl_Text', 'English_Text', 'Text',
'Genre', 'personalities_ents', 'Geographical_locations',
'Nationalities', 'Locations', 'Engagement'],
dtype='object')
Most Trended Nationalities¶
In [ ]:
# Clean the Nationalities data remove dupicates
df['Nationalities'] = df['Nationalities'].astype('str')
df['Nationalities'] = df['Nationalities'].str.strip()
## Removing duplicate words
def remove_duplicate_words(text):
l = text.split(',')
s = set([word.strip() for word in l])
return ', '.join(s)
df['Nationalities'] = df['Nationalities'].apply(lambda x: remove_duplicate_words(x))
persons_df = df['Nationalities'].str.split(', ', expand=True).stack().reset_index(
level=1, drop=True).to_frame('Nationals')
# Join the new DataFrame with the original DataFrame
result= df.drop('Nationalities', axis=1).join(persons_df)
result.dropna(subset=['Nationals'], inplace=True)
result = result.loc[result['Nationals'] != 'nan']
result['Nationals'] = result['Nationals'].str.strip()
Nationals = result[['Date_Published', 'channel', 'Comments', 'viewCount',
'likeCount', 'Engagement', 'Nationals']].reset_index(drop=True)
In [ ]:
Nationals['channel'].value_counts()
Out[ ]:
channel Naa Anveshana 106 Ravi Telugu Traveller 89 Uma Telugu Traveller 58 Name: count, dtype: int64
Top Nationalities Videos in Na Anveshana¶
In [ ]:
Anveshana = Nationals.loc[Nationals['channel'] == 'Naa Anveshana']
In [ ]:
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Anveshana.groupby('Nationals')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Nationals', x='Engagement', color='Nationals',
title='Top 20 Trended Nationalities Videos by Engagement in Na Anveshana')
# Customize the layout if needed
fig.update_layout(yaxis_title='Nationality', xaxis_title='Engagement')
Top Nationalities Videos in Ravi Telugu Traveller¶
In [ ]:
Ravi = Nationals.loc[Nationals['channel'] == 'Ravi Telugu Traveller']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Ravi.groupby('Nationals')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Nationals', x='Engagement', color='Nationals',
title='Top 20 Trended Nationalities Videos by Engagement in Ravi Telugu Traveller')
# Customize the layout if needed
fig.update_layout(yaxis_title='Nationality', xaxis_title='Engagement')
Top Nationalities Videos in Uma Telugu Traveller¶
In [ ]:
Uma = Nationals.loc[Nationals['channel'] == 'Uma Telugu Traveller']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Uma.groupby('Nationals')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Nationals', x='Engagement', color='Nationals',
title='Top 20 Trended Nationalities Videos by Engagement in Uma Telugu Traveller')
# Customize the layout if needed
fig.update_layout(yaxis_title='Nationality', xaxis_title='Engagement')
In [ ]:
df.columns
Out[ ]:
Index(['Date_Published', 'Title', 'Tags', 'channel', 'Comments', 'viewCount',
'likeCount', 'favoriteCount', 'Cl_Text', 'English_Text', 'Text',
'Genre', 'personalities_ents', 'Geographical_locations',
'Nationalities', 'Locations', 'Engagement'],
dtype='object')
Top Locations in YouTube Videos¶
In [ ]:
# Clean the Nationalities data remove dupicates
df['Locations'] = df['Locations'].astype('str')
df['Locations'] = df['Locations'].str.strip()
## Removing duplicate words
def remove_duplicate_words(text):
l = text.split(',')
s = set([word.strip() for word in l])
return ', '.join(s)
df['Locations'] = df['Locations'].apply(lambda x: remove_duplicate_words(x))
persons_df = df['Locations'].str.split(', ', expand=True).stack().reset_index(
level=1, drop=True).to_frame('Location')
# Join the new DataFrame with the original DataFrame
result= df.drop('Locations', axis=1).join(persons_df)
result.dropna(subset=['Location'], inplace=True)
result = result.loc[result['Location'] != 'nan']
result['Location'] = result['Location'].str.strip()
Location = result[['Date_Published', 'channel', 'Comments', 'viewCount',
'likeCount', 'Engagement', 'Location']].reset_index(drop=True)
Location['channel'].value_counts()
Out[ ]:
channel Naa Anveshana 75 Uma Telugu Traveller 56 Ravi Telugu Traveller 21 Name: count, dtype: int64
In [ ]:
Anveshana = Location.loc[Location['channel'] == 'Naa Anveshana']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Anveshana.groupby('Location')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Location', x='Engagement', color='Location',
title='Top 20 Trended Locations Videos by Engagement in Na Anveshana')
# Customize the layout if needed
fig.update_layout(yaxis_title='Areas', xaxis_title='Engagement')
In [ ]:
Ravi = Location.loc[Location['channel'] == 'Ravi Telugu Traveller']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Ravi.groupby('Location')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Location', x='Engagement', color='Location',
title='Top 20 Trended Locations Videos by Engagement in Ravi Telugu Traveller')
# Customize the layout if needed
fig.update_layout(yaxis_title='Areas', xaxis_title='Engagement')
In [ ]:
Uma = Location.loc[Location['channel'] == 'Uma Telugu Traveller']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Uma.groupby('Location')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Location', x='Engagement', color='Location',
title='Top 20 Trended Locations Videos by Engagement in Uma Telugu Traveller')
# Customize the layout if needed
fig.update_layout(yaxis_title='Areas', xaxis_title='Engagement')
Top trended Geographical_locations in YouTube Videos¶
In [ ]:
# Clean the Nationalities data remove dupicates
df['Geographical_locations'] = df['Geographical_locations'].astype('str')
df['Geographical_locations'] = df['Geographical_locations'].str.strip()
## Removing duplicate words
def remove_duplicate_words(text):
l = text.split(',')
s = set([word.strip() for word in l])
return ', '.join(s)
df['Geographical_locations'] = df['Geographical_locations'].apply(lambda x: remove_duplicate_words(x))
persons_df = df['Geographical_locations'].str.split(', ', expand=True).stack().reset_index(
level=1, drop=True).to_frame('Area')
# Join the new DataFrame with the original DataFrame
result= df.drop('Geographical_locations', axis=1).join(persons_df)
result.dropna(subset=['Area'], inplace=True)
result = result.loc[result['Area'] != 'nan']
result['Area'] = result['Area'].str.strip()
Area = result[['Date_Published', 'channel', 'Comments', 'viewCount',
'likeCount', 'Engagement', 'Area']].reset_index(drop=True)
Area['channel'].value_counts()
Out[ ]:
channel Naa Anveshana 910 Ravi Telugu Traveller 590 Uma Telugu Traveller 579 Name: count, dtype: int64
In [ ]:
Anveshana = Area.loc[Area['channel'] == 'Naa Anveshana']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Anveshana.groupby('Area')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Area', x='Engagement', color='Area',
title='Top 20 Trended Areas Videos by Engagement in Na Anveshana')
# Customize the layout if needed
fig.update_layout(yaxis_title='Areas', xaxis_title='Engagement')
In [ ]:
Ravi = Area.loc[Area['channel'] == 'Ravi Telugu Traveller']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Ravi.groupby('Area')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Area', x='Engagement', color='Area',
title='Top 20 Trended Areas Videos by Engagement in Ravi Telugu Traveller ')
# Customize the layout if needed
fig.update_layout(yaxis_title='Areas', xaxis_title='Engagement')
In [ ]:
Uma = Area.loc[Area['channel'] == 'Uma Telugu Traveller']
# Group by 'Nationals' and calculate sum of 'Engagement'
nationality_engagement = Uma.groupby('Area')['Engagement'].sum().reset_index()
# Sort by 'Engagement' in descending order and select top 20 nationalities
top_20_nationalities = nationality_engagement.sort_values(by='Engagement', ascending=False).head(20)
# Create a bar plot for the top 20 trended nationalities
fig = px.bar(top_20_nationalities, y='Area', x='Engagement', color='Area',
title='Top 20 Trended Areas Videos by Engagement in Uma Telugu Traveller ')
# Customize the layout if needed
fig.update_layout(yaxis_title='Areas', xaxis_title='Engagement')